\relax 
\ifx\hyper@anchor\@undefined
\global \let \oldcontentsline\contentsline
\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
\global \let \oldnewlabel\newlabel
\gdef \newlabel#1#2{\newlabelxx{#1}#2}
\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
\AtEndDocument{\let \contentsline\oldcontentsline
\let \newlabel\oldnewlabel}
\else
\global \let \hyper@last\relax 
\fi

\select@language{english}
\@writefile{toc}{\select@language{english}}
\@writefile{lof}{\select@language{english}}
\@writefile{lot}{\select@language{english}}
\citation{Markelic04reinforcementlearning}
\citation{Sutton98}
\citation{Barto90learningand}
\citation{Howard60}
\citation{Littman94}
\citation{Sutton98}
\citation{Sutton98}
\@writefile{toc}{\contentsline {section}{\numberline {1}Introduction}{2}{section.1}}
\newlabel{sec:intro}{{1}{2}{Introduction\relax }{section.1}{}}
\@writefile{toc}{\contentsline {section}{\numberline {2}Foundations of Reinforcement Learning}{2}{section.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.1}The main idea behind reinforcement learning}{2}{subsection.2.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {1}{\ignorespaces The model of the agent and its environment for reinforcement learning (taken from \cite  {Sutton98})}}{3}{figure.1}}
\newlabel{agentWorldModel}{{1}{3}{The model of the agent and its environment for reinforcement learning (taken from \cite {Sutton98})\relax }{figure.1}{}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.2}Important problems}{3}{subsection.2.2}}
\@writefile{toc}{\contentsline {paragraph}{Problem of Exploration and Exploitation}{3}{section*.2}}
\@writefile{toc}{\contentsline {paragraph}{The prediction and the control problem}{3}{section*.3}}
\@writefile{toc}{\contentsline {paragraph}{Partial observability problem}{4}{section*.4}}
\@writefile{toc}{\contentsline {paragraph}{Curse of Dimensionality}{4}{section*.5}}
\@writefile{toc}{\contentsline {paragraph}{Credit Structuring Problem}{4}{section*.6}}
\@writefile{toc}{\contentsline {paragraph}{Non-stationary environments}{4}{section*.7}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.3}Value function and Policies}{4}{subsection.2.3}}
\citation{Sutton98}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.3.1}$\epsilon $-greedy policy}{6}{subsubsection.2.3.1}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.4}Separation from other machine learning approaches}{6}{subsection.2.4}}
\citation{Howard60}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.5}The Markov Property and Markov Decision Processes}{7}{subsection.2.5}}
\citation{Sutton98}
\newlabel{policyIteration}{{2.6.1}{8}{Policy Iteration\relax }{section*.8}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces The principle of policy iteration}}{8}{figure.2}}
\@writefile{toc}{\contentsline {subsection}{\numberline {2.6}How to find optimal policies?}{8}{subsection.2.6}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.6.1}Dynamic Programming}{8}{subsubsection.2.6.1}}
\@writefile{toc}{\contentsline {paragraph}{Policy Iteration}{8}{section*.8}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.6.2}Monte Carlo Methods}{8}{subsubsection.2.6.2}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces Policy Iteration}}{9}{algorithm.1}}
\newlabel{policyIterationAlgorithm}{{1}{9}{Policy Iteration\relax }{algorithm.1}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.6.3}Temporal Difference Learning}{9}{subsubsection.2.6.3}}
\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Initial situation of the grid world with rewards for the actions}}{10}{figure.3}}
\newlabel{firstPolicyFigure}{{3}{10}{Initial situation of the grid world with rewards for the actions\relax }{figure.3}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces situation after one value update}}{10}{figure.4}}
\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces situation after all value updates}}{10}{figure.5}}
\@writefile{lof}{\contentsline {figure}{\numberline {6}{\ignorespaces situation after policy change}}{10}{figure.6}}
\@writefile{lof}{\contentsline {figure}{\numberline {7}{\ignorespaces algorithm terminates after final value update}}{10}{figure.7}}
\newlabel{lastPolicyFigure}{{7}{10}{algorithm terminates after final value update\relax }{figure.7}{}}
\@writefile{toc}{\contentsline {paragraph}{The Q-Learning algorithm}{10}{section*.9}}
\citation{Sutton98}
\@writefile{loa}{\contentsline {algorithm}{\numberline {2}{\ignorespaces Monte Carlo Algorithm for finding an optimal policy}}{11}{algorithm.2}}
\newlabel{monteCarlo}{{2}{11}{Monte Carlo Methods\relax }{algorithm.2}{}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {3}{\ignorespaces Q-Learning algorithm}}{11}{algorithm.3}}
\newlabel{qlearningAlgorithm}{{3}{11}{The Q-Learning algorithm\relax }{algorithm.3}{}}
\@writefile{toc}{\contentsline {paragraph}{Sarsa}{11}{section*.10}}
\citation{Littman94}
\citation{survey1}
\citation{survey2}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {2.6.4}Eligibility traces as an improvement of reinforcement learning algorithms}{12}{subsubsection.2.6.4}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {4}{\ignorespaces SARSA($\lambda $)}}{12}{algorithm.4}}
\newlabel{sarsaLambda}{{4}{12}{Eligibility traces as an improvement of reinforcement learning algorithms\relax }{algorithm.4}{}}
\citation{owen95}
\@writefile{toc}{\contentsline {section}{\numberline {3}Multi-agent reinforcement learning}{13}{section.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.1}Problems of multi-agent reinforcement learning}{13}{subsection.3.1}}
\@writefile{lof}{\contentsline {figure}{\numberline {8}{\ignorespaces Categorization of algorithms}}{13}{figure.8}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}The framework of Markov Games}{13}{subsection.3.2}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.2.1}Finding optimal policies in Markov games}{14}{subsubsection.3.2.1}}
\citation{Littman94}
\citation{Littman94}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}The Minimax-Q learning algorithm}{15}{subsection.3.3}}
\@writefile{toc}{\contentsline {subsection}{\numberline {3.4}Soccer as an application of the Minimax-Q algorithm}{15}{subsection.3.4}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.1}Game description and rules}{15}{subsubsection.3.4.1}}
\@writefile{loa}{\contentsline {algorithm}{\numberline {5}{\ignorespaces The Minimax-Q algorithm (adapted from \cite  {Littman94})}}{16}{algorithm.5}}
\newlabel{minimaxQ}{{5}{16}{The Minimax-Q learning algorithm\relax }{algorithm.5}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.2}Training and Testing}{16}{subsubsection.3.4.2}}
\citation{Tesauro92}
\@writefile{lof}{\contentsline {figure}{\numberline {9}{\ignorespaces Initial situation}}{17}{figure.9}}
\newlabel{initialSetup}{{9}{17}{Initial situation\relax }{figure.9}{}}
\@writefile{lof}{\contentsline {figure}{\numberline {10}{\ignorespaces Situation that requires a probabilistic policy}}{17}{figure.10}}
\newlabel{probPolicyRequired}{{10}{17}{Situation that requires a probabilistic policy\relax }{figure.10}{}}
\@writefile{toc}{\contentsline {subsubsection}{\numberline {3.4.3}Results}{17}{subsubsection.3.4.3}}
\citation{Sutton98}
\citation{Littman94}
\@writefile{toc}{\contentsline {section}{\numberline {4}Related Work}{18}{section.4}}
\newlabel{sec:relatedwork}{{4}{18}{Related Work\relax }{section.4}{}}
\@writefile{toc}{\contentsline {section}{\numberline {5}Conclusion and Outlook}{18}{section.5}}
\newlabel{sec:conclusion_outlook}{{5}{18}{Conclusion and Outlook\relax }{section.5}{}}
\citation{*}
\bibstyle{alpha}
\bibdata{sempaper}
\bibcite{Arai00multi-agentreinforcement}{ASP00}
\bibcite{survey1}{BBD06}
\bibcite{BusBab:survey2}{BBDS08}
\bibcite{Barto90learningand}{BSW90}
\bibcite{Crites96improvingelevator}{CB96}
\bibcite{Howard60}{How60}
\bibcite{learningContest}{lea}
\bibcite{Littman94}{Lit94}
\bibcite{Littman01}{Lit01}
\bibcite{Markelic04reinforcementlearning}{Mar04}
\bibcite{owen95}{Owe95}
\bibcite{Samuel59}{Sam59}
\bibcite{Sutton98}{SB98}
\bibcite{Tan93multi-agentreinforcement}{Tan93}
\bibcite{Tuyls05multi-agentrelational}{TCR{$^{+}$}05}
\bibcite{Tesauro92}{Tes92}
\bibcite{Tesauro02programmingbackgammon}{Tes02}
